#### Code to process risk score data from SRRTs and CMS State-level risk score data
	# 2nd part of code processes the geographic cost factors

year_to_run <- 2019

#### NOTE: you have to use 32-bit R to run this (problem with rJava)

setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data/Rate Filings") 

# Load Packages
library(XLConnect)
library(xtable)
library(scales)
library(stargazer)

# Prepare Risk Score Data

	# Blue Shield is different from the others
	
		# Initialize rsdata
		member_data <- readWorksheetFromFile("2020_SRRT_Blue_Shield.xlsx",sheet=6,region = 'D16:I36',header=TRUE)
		score_data <- readWorksheetFromFile("2020_SRRT_Blue_Shield.xlsx",sheet=6,region = 'K16:P36',header=TRUE)
		
		rownames(member_data)[nrow(member_data)] <- "Total" 
		rownames(score_data)[nrow(member_data)] <- "Total"
		
		rsdata <- cbind(2020,"Blue_Shield",expand.grid(rownames(member_data),colnames(member_data)),as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
		colnames(rsdata) <- c("year","insurer","rating_area","metal","risk_score","member_months")
	
		# Add 2016-2019
		for(t in c(2016:2019)) {
			if (t == 2019) {
				member_data <- readWorksheetFromFile("2016-2019 Blue Shield Risk Scores.xlsx",sheet=(t-2016+1),region = 'D16:I36',header=TRUE)
				score_data <- readWorksheetFromFile("2016-2019 Blue Shield Risk Scores.xlsx",sheet=(t-2016+1),region = 'K16:P36',header=TRUE)
			} else {
				member_data <- readWorksheetFromFile("2016-2019 Blue Shield Risk Scores.xlsx",sheet=(t-2016+1),region = 'D14:I34',header=TRUE)
				score_data <- readWorksheetFromFile("2016-2019 Blue Shield Risk Scores.xlsx",sheet=(t-2016+1),region = 'K14:P34',header=TRUE)
			}
			
			rownames(member_data)[nrow(member_data)] <- "Total" 
			rownames(score_data)[nrow(member_data)] <- "Total"
			
			to_add <- cbind(t,"Blue_Shield",expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			rsdata <- rbind(rsdata,to_add)
		}
	
	# 2014
	
		insurers <- c("Anthem","Kaiser","Molina","Valley","Chinese_Community","Sharp")

		for(f in insurers) {
		
			filename <- paste("2014_SRRT_",f,".xlsx",sep="")
		
			if(f == "Kaiser"){
				member_data <- readWorksheetFromFile(filename,sheet=7,region = 'D14:I34',header=TRUE)
				score_data <- readWorksheetFromFile(filename,sheet=7,region = 'K14:P34',header=TRUE)
			} else {
				member_data <- readWorksheetFromFile(filename,sheet=5,region = 'D14:I34',header=TRUE)
				score_data <- readWorksheetFromFile(filename,sheet=5,region = 'K14:P34',header=TRUE)
			}
			if(nrow(score_data) == 20) {
				rownames(member_data)[nrow(member_data)] <- "Total" 
				rownames(score_data)[nrow(member_data)] <- "Total"
			}
			
			member_data <- member_data[!is.na(member_data[,"Silver"]),]
			score_data <- score_data[!is.na(score_data[,"Silver"]),]
			
			to_add <- cbind(2014,f,expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			to_add <- to_add[to_add$member_months > 0,]
			rsdata <- rbind(rsdata,to_add)
		}
	
	
	# 2015
	
		insurers <- c("Health_Net_HMO","Kaiser","Chinese_Community","Sharp","Western")

		for(f in insurers) {
		
			filename <- paste("2015_SRRT_",f,".xlsx",sep="")
		
			member_data <- readWorksheetFromFile(filename,sheet=6,region = 'D14:I34',header=TRUE)
			score_data <- readWorksheetFromFile(filename,sheet=6,region = 'K14:P34',header=TRUE)
			if(nrow(score_data) == 20) {
				rownames(member_data)[nrow(member_data)] <- "Total" 
				rownames(score_data)[nrow(member_data)] <- "Total"
			}
			
			member_data <- member_data[!is.na(member_data[,"Silver"]),]
			score_data <- score_data[!is.na(score_data[,"Silver"]),]
			
			to_add <- cbind(2015,f,expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			to_add <- to_add[to_add$member_months > 0,]
			rsdata <- rbind(rsdata,to_add)
		}
	
	
	# 2016
		
		insurers <- c("Anthem","Kaiser","Health_Net_HMO",
			"Valley","Molina","Oscar","Chinese_Community","Sharp")

		for(f in insurers) {
		
			filename <- paste("2016_SRRT_",f,".xlsx",sep="")
		
			if(f == "Health_Net_HMO") {
				member_data <- readWorksheetFromFile(paste("2016_SRRT_Health_Net_HMO.xlsx",sep=""),sheet=5,region = 'D14:I34',header=TRUE)
				score_data <- readWorksheetFromFile(paste("2016_SRRT_Health_Net_HMO.xlsx",sep=""),sheet=5,region = 'K14:P34',header=TRUE)
			} else {
				member_data <- readWorksheetFromFile(filename,sheet=6,region = 'D14:I34',header=TRUE)
				score_data <- readWorksheetFromFile(filename,sheet=6,region = 'K14:P34',header=TRUE)
			}
			if(nrow(score_data) == 20) {
				rownames(member_data)[nrow(member_data)] <- "Total" 
				rownames(score_data)[nrow(member_data)] <- "Total"
			}
			
			member_data <- member_data[!is.na(member_data[,"Silver"]),]
			score_data <- score_data[!is.na(score_data[,"Silver"]),]
			if (f == "Kaiser") {
				score_data <- score_data[score_data$Silver > 0,]
			}
			
			to_add <- cbind(2016,f,expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			to_add <- to_add[to_add$member_months > 0,]
			rsdata <- rbind(rsdata,to_add)
		}
	
		rsdata$risk_score <- as.numeric(rsdata$risk_score)
	
	# 2017
	
		insurers <- c("Anthem","Kaiser","Health_Net_HMO",
			"LA_Care","Molina","Oscar","Chinese_Community","Sharp","Western")

		for(f in insurers) {
		
			filename <- paste("2017_SRRT_",f,".xlsx",sep="")
		
			# Health Net's SRRT caused when reading in
			if(f == "Health_Net_HMO") {
				member_data <- readWorksheetFromFile(paste("2017_SRRT_Health_Net_HMO_Extract.xlsx",sep=""),sheet=1,region = 'A5:F25',header=TRUE)
				score_data <- readWorksheetFromFile(paste("2017_SRRT_Health_Net_HMO_Extract.xlsx",sep=""),sheet=1,region = 'H5:M25',header=TRUE)
			} else {
				member_data <- readWorksheetFromFile(filename,sheet=6,region = 'D14:I34',header=TRUE)
				score_data <- readWorksheetFromFile(filename,sheet=6,region = 'K14:P34',header=TRUE)
			}
			if(nrow(score_data) == 20) {
					rownames(member_data)[nrow(member_data)] <- "Total" 
					rownames(score_data)[nrow(member_data)] <- "Total"
			}
			member_data <- member_data[!is.na(member_data[,"Silver"]),]
			score_data <- score_data[!is.na(score_data[,"Silver"]),]
			
			to_add <- cbind(2017,f,expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			to_add <- to_add[to_add$member_months > 0,]
			rsdata <- rbind(rsdata,to_add)
		}
		
		
		
			
	# 2018
	
		insurers <- c("Anthem","Health_Net_HMO","Health_Net_PPO","Kaiser",
			"LA_Care","Molina","Valley","Oscar","Chinese_Community","Sharp","Western")

		for(f in insurers) {
		
			filename <- paste("2018_SRRT_",f,".xlsx",sep="")
		
			member_data <- readWorksheetFromFile(filename,sheet=5,region = 'D14:I34',header=TRUE)
			score_data <- readWorksheetFromFile(filename,sheet=5,region = 'K14:P34',header=TRUE)
			if(nrow(score_data) == 20) {
				rownames(member_data)[nrow(member_data)] <- "Total" 
				rownames(score_data)[nrow(member_data)] <- "Total"
			}
			
			member_data <- member_data[!is.na(member_data[,"Silver"]),]
			score_data <- score_data[!is.na(score_data[,"Silver"]),]
			
			to_add <- cbind(2018,f,expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			to_add <- to_add[to_add$member_months > 0,]
			rsdata <- rbind(rsdata,to_add)
		}
	
	# 2019
	
		insurers <- c("Anthem","Health_Net_HMO","Health_Net_PPO","Kaiser",
			"LA_Care","Molina","Valley","Oscar","Sharp","Western")

		for(f in insurers) {
		
			filename <- paste("2019_SRRT_",f,".xlsx",sep="")
		
			member_data <- readWorksheetFromFile(filename,sheet=6,region = 'D16:I36',header=TRUE)
			score_data <- readWorksheetFromFile(filename,sheet=6,region = 'K16:P36',header=TRUE)
			if(nrow(score_data) == 20) {
				rownames(member_data)[nrow(member_data)] <- "Total" 
				rownames(score_data)[nrow(member_data)] <- "Total"
			}
			
			member_data <- member_data[!is.na(member_data[,"Silver"]),]
			score_data <- score_data[!is.na(score_data[,"Silver"]),]
			
			to_add <- cbind(2019,f,expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			to_add <- to_add[to_add$member_months > 0,]
			rsdata <- rbind(rsdata,to_add)
		}

	# 2020
	
		insurers <- c("Anthem","Health_Net_HMO","Health_Net_PPO",
			"LA_Care","Molina","Valley","Oscar","Chinese_Community","Sharp","Western")

		for(f in insurers) {
		
			filename <- paste("2020_SRRT_",f,".xlsx",sep="")
		
			member_data <- readWorksheetFromFile(filename,sheet=6,region = 'D16:I36',header=TRUE)
			score_data <- readWorksheetFromFile(filename,sheet=6,region = 'K16:P36',header=TRUE)
			if(nrow(score_data) == 20) {
				rownames(member_data)[nrow(member_data)] <- "Total" 
				rownames(score_data)[nrow(member_data)] <- "Total"
			}
			
			member_data <- member_data[!is.na(member_data[,"Silver"]),]
			score_data <- score_data[!is.na(score_data[,"Silver"]),]
			
			to_add <- cbind(2020,f,expand.grid(rownames(member_data),colnames(member_data)),
				as.vector(as.matrix(score_data)),as.vector(as.matrix(member_data)))
			colnames(to_add) <- colnames(rsdata)
			to_add <- to_add[to_add$member_months > 0,]
			rsdata <- rbind(rsdata,to_add)
		}

# Clean up metal and insurer variables so they matches demand data

	# Metal
	rsdata$metal <- as.character(rsdata$metal)
	rsdata[rsdata$metal == "Other","metal"] <- "Minimum Coverage"
	
	# Plan Type
	rsdata$plan_type <- "Both"
	rsdata[rsdata$insurer == "Health_Net_HMO","plan_type"] <- "HMO"
	rsdata[rsdata$insurer == "Health_Net_PPO","plan_type"] <- "PPO"
	
	# Insurer 
	rsdata$insurer <- as.character(rsdata$insurer)
	rsdata[rsdata$insurer %in% c("Health_Net_HMO","Health_Net_PPO"),"insurer"] <- "Health_Net"
	
	rownames(rsdata) <- paste(rsdata$insurer,rsdata$plan_type,rsdata$metal,rsdata$year,rsdata$rating_area,sep="_")

# Add AV

	rsdata$AV <- 0.57 # catastrophic
	rsdata[rsdata$metal == "Bronze","AV"] <- 0.6
	rsdata[rsdata$metal == "Silver","AV"] <- 0.7
	rsdata[rsdata$metal == "Gold","AV"] <- 0.8
	rsdata[rsdata$metal == "Platinum","AV"] <- 0.9
	

setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data") 
write.csv(rsdata,"risk_score_data.csv")



##### Process Geographic Cost Factors from CMS

	setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data/RA-RI-RC Data")

	years <- c(2014:2020)
	rating_areas <- c(1:19)
	gdata <- expand.grid(rating_area = rating_areas,year = years)
	rownames(gdata) <- paste(gdata$rating_area,gdata$year,sep="_")
	
	fields <- c("GCF","member_months")
	gdata[,fields] <- NA
	
	
	for(year in years) {
		
		filename <- paste(year,"_App_B.xlsx",sep="")
		if(year == 2014) {
			yeardata <- readWorksheetFromFile(filename,sheet=1,region = 'A47:C66',header=TRUE)
			yeardata$year <- year
			rownames(yeardata) <- paste(yeardata$Rating.Area,yeardata$year,sep="_")
			gdata[rownames(yeardata),fields] <- yeardata[,c("GCF","Billable.Member.Months")]
		} else if(year %in% c(2015,2017,2018,2019)) {
			yeardata <- readWorksheetFromFile(filename,sheet=1,region = 'A1:E50',header=TRUE)
			yeardata$year <- year
			if(year %in% c(2017,2018)) yeardata$Rating.Area <- yeardata$Exchange.Rating.Area
			yeardata$Rating.Area <- as.numeric(yeardata$Rating.Area)
			yeardata <- yeardata[yeardata$State == "CA",]
			rownames(yeardata) <- paste(yeardata$Rating.Area,yeardata$year,sep="_")
			gdata[rownames(yeardata),fields] <- yeardata[,c("GCF","Billable.Member.Months")]
		} else if(year %in% c(2016,2020)) {
			yeardata <- readWorksheetFromFile(filename,sheet=1,region = 'A1:D54',header=TRUE)
			yeardata$year <- year
			yeardata$Rating.Area <- as.numeric(yeardata$Rating.Area)
			yeardata <- yeardata[yeardata$State == "CA" & !is.na(yeardata$State),]
			rownames(yeardata) <- paste(yeardata$Rating.Area,yeardata$year,sep="_")
			gdata[rownames(yeardata),fields] <- yeardata[,c("GCF","Billable.Member.Months")]
		}
	}

	setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data")
	write.csv(gdata,"ca_cms_geographic_factors.csv")
	
	# Save Table for Paper
	output <- gdata[gdata$year <= 2018,]
	output <- matrix(output[,"GCF"],nrow=19,ncol=5,byrow=FALSE)
	write.csv(output,"gcf_table.csv")
	